# Title     : TODO
# Objective : TODO
# Created by: Administrator
# Created on: 2019/8/12
library(optparse)
library(magrittr)
library(tidyverse)

option_list <- list(
  make_option("--i", default = "AllMet1.csv", type = "character", help = "raw metabolite data file"),
  make_option("--config", default = "config.csv", type = "character", help = "config file")
)
opt <- parse_args(OptionParser(option_list = option_list))

configData <- read_tsv(opt$config) %>%
  set_colnames(c("arg", "value"))

isNormal <- configData %>%
  filter(arg == "isNormal") %>%
  .$value == "T"

rawData <- read_csv("02_AllMet_Raw_Missing_Value_Filled.csv")

if (isNormal) {
  rawData <- read_csv("03_AllMet_Raw_NormArea.csv")
}

keggInfoData <- read_tsv("keggInfo.txt") %>%
  select(c("Class", "HMDB", "KEGG", "Raw_Metabolite", "Metabolite")) %>%
  rename(HMDB = HMDB, KEGG = KEGG)

nrow(keggInfoData)

rawDataColumn <- rawData %>%
  select(-c("Raw_Metabolite"))

lowerKeggData <- keggInfoData %>%
  mutate(lowerName = tolower(Raw_Metabolite)) %>%
  select(-"Raw_Metabolite")

finalColumnNames <- c("Class", "HMDB", "KEGG", "Raw_Metabolite", "Metabolite")
finalColumnNames <- c(finalColumnNames, colnames(rawDataColumn))

nrow(rawData)
nrow(lowerKeggData)

outData <- rawData %>%
  mutate(Metabolite = iconv(enc2utf8(Raw_Metabolite), sub = "byte")) %>%
  mutate(lowerName = tolower(Metabolite)) %>%
  select(-c("Metabolite")) %>%
  left_join(lowerKeggData, by = c("lowerName")) %>%
  select(-c("lowerName")) %>%
  select(finalColumnNames) %>%
  mutate_at(vars("Class"), function(x) {
    replace_na(x, "Unknown")
  }) %>%
  rowwise() %>%
  do({
       result <- as_tibble(.)
       id <- result[1, "Metabolite"]
       if (is.na(id)) {
         result %>% mutate(Metabolite = Raw_Metabolite)
       }else result
     }) %>%
  ungroup()

outData

write.csv(outData, "03.2_AllMet_with_Raw_Metabolite.csv", row.names = F)


